home *** CD-ROM | disk | FTP | other *** search
- /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- /* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * The Original Code is mozilla.org code.
- *
- * The Initial Developer of the Original Code is
- * Netscape Communications Corporation.
- * Portions created by the Initial Developer are Copyright (C) 1998
- * the Initial Developer. All Rights Reserved.
- *
- * Contributor(s):
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either of the GNU General Public License Version 2 or later (the "GPL"),
- * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
- /**
- * MODULE NOTES:
- * @update gess 4/1/98
- *
- * This file contains the declarations for all the HTML specific token types that
- * our DTD's understand. In fact, the same set of token types are used for XML.
- * Currently we have tokens for text, comments, start and end tags, entities,
- * attributes, style, script and skipped content. Whitespace and newlines also
- * have their own token types, but don't count on them to stay forever.
- *
- * If you're looking for the html tags, they're in a file called nsHTMLTag.h/cpp.
- *
- * Most of the token types have a similar API. They have methods to get the type
- * of token (GetTokenType); those that represent HTML tags also have a method to
- * get type tag type (GetTypeID). In addition, most have a method that causes the
- * token to help in the parsing process called (Consume). We've also thrown in a
- * few standard debugging methods as well.
- */
-
- #ifndef HTMLTOKENS_H
- #define HTMLTOKENS_H
-
- #include "nsToken.h"
- #include "nsHTMLTags.h"
- #include "nsString.h"
- #include "nsScannerString.h"
-
- class nsScanner;
-
- /*******************************************************************
- * This enum defines the set of token types that we currently support.
- *******************************************************************/
-
- enum eHTMLTokenTypes {
- eToken_unknown=0,
- eToken_start=1, eToken_end, eToken_comment, eToken_entity,
- eToken_whitespace, eToken_newline, eToken_text, eToken_attribute,
- eToken_instruction, eToken_cdatasection, eToken_doctypeDecl, eToken_markupDecl,
- eToken_last //make sure this stays the last token...
- };
-
- enum eHTMLCategory {
- eHTMLCategory_unknown=0,
- eHTMLCategory_inline,
- eHTMLCategory_block,
- eHTMLCategory_blockAndInline,
- eHTMLCategory_list,
- eHTMLCategory_table,
- eHTMLCategory_tablepart,
- eHTMLCategory_tablerow,
- eHTMLCategory_tabledata,
- eHTMLCategory_head,
- eHTMLCategory_html,
- eHTMLCategory_body,
- eHTMLCategory_form,
- eHTMLCategory_options,
- eHTMLCategory_frameset,
- eHTMLCategory_text
- };
-
-
- nsresult ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
- nsresult ConsumeAttributeText(PRUnichar aChar,nsString& aString,nsScanner& aScanner);
- const PRUnichar* GetTagName(PRInt32 aTag);
- //PRInt32 FindEntityIndex(nsString& aString,PRInt32 aCount=-1);
-
-
-
- /**
- * This declares the basic token type used in the HTML DTD's.
- * @update gess 3/25/98
- */
- class CHTMLToken : public CToken {
- public:
- virtual ~CHTMLToken();
- CHTMLToken(eHTMLTags aTag);
-
- virtual eContainerInfo GetContainerInfo(void) const {return eFormUnknown;}
- virtual void SetContainerInfo(eContainerInfo aInfo) { }
-
- protected:
- };
-
- /**
- * This declares start tokens, which always take the form <xxxx>.
- * This class also knows how to consume related attributes.
- *
- * @update gess 3/25/98
- */
- class CStartToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CStartToken(eHTMLTags aTag=eHTMLTag_unknown);
- CStartToken(const nsAString& aString);
- CStartToken(const nsAString& aName,eHTMLTags aTag);
-
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTypeID(void);
- virtual PRInt32 GetTokenType(void);
-
- virtual PRBool IsEmpty(void);
- virtual void SetEmpty(PRBool aValue);
-
- virtual const nsSubstring& GetStringValue();
- virtual void GetSource(nsString& anOutputString);
- virtual void AppendSourceTo(nsAString& anOutputString);
-
- // the following info is used to set well-formedness state on start tags...
- virtual eContainerInfo GetContainerInfo(void) const {return mContainerInfo;}
- virtual void SetContainerInfo(eContainerInfo aContainerInfo) {
- if (eFormUnknown==mContainerInfo) {
- mContainerInfo=aContainerInfo;
- }
- }
- virtual PRBool IsWellFormed(void) const {
- return eWellFormed == mContainerInfo;
- }
-
- nsString mTextValue;
- protected:
- eContainerInfo mContainerInfo;
- PRPackedBool mEmpty;
- #ifdef DEBUG
- PRPackedBool mAttributed;
- #endif
- };
-
-
- /**
- * This declares end tokens, which always take the
- * form </xxxx>. This class also knows how to consume
- * related attributes.
- *
- * @update gess 3/25/98
- */
- class CEndToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CEndToken(eHTMLTags aTag);
- CEndToken(const nsAString& aString);
- CEndToken(const nsAString& aName,eHTMLTags aTag);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTypeID(void);
- virtual PRInt32 GetTokenType(void);
-
- virtual const nsSubstring& GetStringValue();
- virtual void GetSource(nsString& anOutputString);
- virtual void AppendSourceTo(nsAString& anOutputString);
-
- protected:
- nsString mTextValue;
- };
-
-
- /**
- * This declares comment tokens. Comments are usually
- * thought of as tokens, but we treat them that way
- * here so that the parser can have a consistent view
- * of all tokens.
- *
- * @update gess 3/25/98
- */
- class CCommentToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CCommentToken();
- CCommentToken(const nsAString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
- virtual void AppendSourceTo(nsAString& anOutputString);
-
- nsresult ConsumeStrictComment(nsScanner& aScanner);
- nsresult ConsumeQuirksComment(nsScanner& aScanner);
-
- protected:
- nsScannerSubstring mComment; // does not include MDO & MDC
- nsScannerSubstring mCommentDecl; // includes MDO & MDC
- };
-
-
- /**
- * This class declares entity tokens, which always take
- * the form &xxxx;. This class also offers a few utility
- * methods that allow you to easily reduce entities.
- *
- * @update gess 3/25/98
- */
- class CEntityToken : public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CEntityToken();
- CEntityToken(const nsAString& aString);
- virtual PRInt32 GetTokenType(void);
- PRInt32 TranslateToUnicodeStr(nsString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- static nsresult ConsumeEntity(PRUnichar aChar, nsString& aString,
- nsScanner& aScanner);
- static PRInt32 TranslateToUnicodeStr(PRInt32 aValue,nsString& aString);
-
- virtual const nsSubstring& GetStringValue(void);
- virtual void GetSource(nsString& anOutputString);
- virtual void AppendSourceTo(nsAString& anOutputString);
-
- protected:
- nsString mTextValue;
- };
-
-
- /**
- * Whitespace tokens are used where whitespace can be
- * detected as distinct from text. This allows us to
- * easily skip leading/trailing whitespace when desired.
- *
- * @update gess 3/25/98
- */
- class CWhitespaceToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CWhitespaceToken();
- CWhitespaceToken(const nsAString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
-
- protected:
- nsScannerSharedSubstring mTextValue;
- };
-
- /**
- * Text tokens contain the normalized form of html text.
- * These tokens are guaranteed not to contain entities,
- * start or end tags, or newlines.
- *
- * @update gess 3/25/98
- */
- class CTextToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CTextToken();
- CTextToken(const nsAString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual PRInt32 GetTextLength(void);
- virtual void CopyTo(nsAString& aStr);
- virtual const nsSubstring& GetStringValue(void);
- virtual void Bind(nsScanner* aScanner, nsScannerIterator& aStart,
- nsScannerIterator& aEnd);
- virtual void Bind(const nsAString& aStr);
-
- nsresult ConsumeCharacterData(PRBool aConservativeConsume,
- PRBool aIgnoreComments,
- nsScanner& aScanner,
- const nsAString& aEndTagName,
- PRInt32 aFlag,
- PRBool& aFlushTokens);
-
- nsresult ConsumeParsedCharacterData(PRBool aDiscardFirstNewline,
- PRBool aConservativeConsume,
- nsScanner& aScanner,
- const nsAString& aEndTagName,
- PRInt32 aFlag,
- PRBool& aFound);
-
- protected:
- nsScannerSubstring mTextValue;
- };
-
-
- /**
- * CDATASection tokens contain raw unescaped text content delimited by
- * a ![CDATA[ and ]].
- * XXX Not really a HTML construct - maybe we need a separation
- *
- * @update vidur 11/12/98
- */
- class CCDATASectionToken : public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CCDATASectionToken(eHTMLTags aTag = eHTMLTag_unknown);
- CCDATASectionToken(const nsAString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
-
- protected:
- nsString mTextValue;
- };
-
-
- /**
- * Declaration tokens contain raw unescaped text content (not really, but
- * right now we use this only for view source).
- * XXX Not really a HTML construct - maybe we need a separation
- *
- */
- class CMarkupDeclToken : public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CMarkupDeclToken();
- CMarkupDeclToken(const nsAString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
-
- protected:
- nsScannerSubstring mTextValue;
- };
-
-
- /**
- * Attribute tokens are used to contain attribute key/value
- * pairs whereever they may occur. Typically, they should
- * occur only in start tokens. However, we may expand that
- * ability when XML tokens become commonplace.
- *
- * @update gess 3/25/98
- */
- class CAttributeToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CAttributeToken();
- CAttributeToken(const nsAString& aString);
- CAttributeToken(const nsAString& aKey, const nsAString& aString);
- ~CAttributeToken() {}
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- const nsSubstring& GetKey(void) { return mTextKey.AsString(); }
- virtual void SetKey(const nsAString& aKey);
- virtual void BindKey(nsScanner* aScanner, nsScannerIterator& aStart,
- nsScannerIterator& aEnd);
- const nsSubstring& GetValue(void) {return mTextValue.str();}
- virtual void SanitizeKey();
- virtual const nsSubstring& GetStringValue(void);
- virtual void GetSource(nsString& anOutputString);
- virtual void AppendSourceTo(nsAString& anOutputString);
-
- PRPackedBool mHasEqualWithoutValue;
- protected:
- #ifdef DEBUG
- PRPackedBool mLastAttribute;
- #endif
- nsScannerSharedSubstring mTextValue;
- nsScannerSubstring mTextKey;
- };
-
-
- /**
- * Newline tokens contain, you guessed it, newlines.
- * They consume newline (CR/LF) either alone or in pairs.
- *
- * @update gess 3/25/98
- */
- class CNewlineToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CNewlineToken();
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
-
- static void AllocNewline();
- static void FreeNewline();
- };
-
-
- /**
- * Whitespace tokens are used where whitespace can be
- * detected as distinct from text. This allows us to
- * easily skip leading/trailing whitespace when desired.
- *
- * @update gess 3/25/98
- */
- class CInstructionToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CInstructionToken();
- CInstructionToken(const nsAString& aString);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
-
- protected:
- nsString mTextValue;
- };
-
-
- /**
- * This token is generated by the HTML and Expat tokenizers
- * when they see the doctype declaration ("<!DOCTYPE ... >")
- *
- */
-
- class CDoctypeDeclToken: public CHTMLToken {
- CTOKEN_IMPL_SIZEOF
-
- public:
- CDoctypeDeclToken(eHTMLTags aTag=eHTMLTag_unknown);
- CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag=eHTMLTag_unknown);
- virtual nsresult Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode);
- virtual PRInt32 GetTokenType(void);
- virtual const nsSubstring& GetStringValue(void);
- virtual void SetStringValue(const nsAString& aStr);
-
- protected:
- nsString mTextValue;
- };
-
- #endif
-